import pandas as pd
import numpy as np
data1=pd.read_excel('广西.xlsx')
data2=pd.read_excel('湖北.xlsx')
data3=data1.append(data2)
#把预处理中按省份分开的表格合并
data4=data3[data3['产品通用名称']=='复混肥料']
data4.loc[:,'总无机养分百分比'].astype(int)
print(data4['总无机养分百分比'].max())
print(data4['总无机养分百分比'].min())
bins=[0,0.072,0.144,0.216,0.288,0.36,0.432,0.504,0.576,0.648,0.72]
labels1=['1','2','3','4','5','6','7','8','9','10']
labels2=['[0,0.072]','[0.072,0.144]','[0.144,0.216]','[0.216,0.288]','[0.288,0.36]','[0.36,0.432]','[0.432,0.504]','[0.504,0.576]','[0.576,0.648]','[0.648,0.72]']
data4['标签']=pd.cut(data4['总无机养分百分比'],bins,labels=labels1)
data4['分层']=pd.cut(data4['总无机养分百分比'],bins,labels=labels2)
#将所有复混肥料按照总无机养分百分比的取值等距分为 10 组并打上标签。
print(data4['分层'].value_counts())
data4.to_excel('result2_1.xlsx')

aggResult=data4.groupby(by=['分层'])['总无机养分百分比'].agg({'总无机养分百分比':np.size})
PaggResult=round(aggResult/aggResult.sum(),2)*100
#将结果做成百分比形式
import matplotlib.pyplot as plt
plt.figure(figsize=(10,6))
PaggResult['总无机养分百分比'].plot(kind='bar',width=1.0,fontsize=10)

# 设置matplotlib正常显示中文和负号
plt.rcParams['font.sans-serif']=['SimHei']   # 用黑体显示中文
plt.rcParams['axes.unicode_minus']=False     # 正常显示负号
plt.title('产品登记数量频率分布直方图',fontsize=20)
plt.yticks([0,5,10,15,20,25,30,35],['%0','%5','%10','%15',"%20",'%25','%30','%35'])
plt.legend(labels=['分布频率'])
plt.show()
#绘制产品登记数量的直方图